import torch
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams
from PIL import Image
import util

# Pass the default decoding hyperparameters of Qwen2-7B-Instruct
# max_tokens is for the maximum length for generation.
# Input the model name or path. Can be GPTQ or AWQ models.
llm = LLM(model="hf-models/Qwen2-VL-7B-Instruct", trust_remote_code=True)


def recognize(image_url, target_text):
    output = llm.generate("Hello, my name is")
    print(output)

    return {}